{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn import preprocessing\n",
    "import matplotlib.pyplot as plt \n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.metrics import silhouette_samples, silhouette_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(14999, 10)\n",
      "['satisfaction_level', 'last_evaluation', 'number_project', 'average_montly_hours', 'time_spend_company', 'Work_accident', 'left', 'promotion_last_5years', 'sales', 'salary']\n"
     ]
    }
   ],
   "source": [
    "hr_data = pd.read_csv('data/hr.csv', header=0)\n",
    "hr_data.head()\n",
    "hr_data = hr_data.dropna()\n",
    "print(hr_data.shape)\n",
    "print(list(hr_data.columns))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['satisfaction_level', 'last_evaluation', 'number_project',\n",
       "       'average_montly_hours', 'time_spend_company', 'Work_accident', 'left',\n",
       "       'promotion_last_5years', 'salary_high', 'salary_low', 'salary_medium',\n",
       "       'sales_IT', 'sales_RandD', 'sales_accounting', 'sales_hr',\n",
       "       'sales_management', 'sales_marketing', 'sales_product_mng',\n",
       "       'sales_sales', 'sales_support', 'sales_technical'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_trnsf = pd.get_dummies(hr_data, columns =['salary', 'sales'])\n",
    "data_trnsf.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'algorithm': 'auto',\n",
       " 'copy_x': True,\n",
       " 'init': 'k-means++',\n",
       " 'max_iter': 300,\n",
       " 'n_clusters': 3,\n",
       " 'n_init': 10,\n",
       " 'n_jobs': 1,\n",
       " 'precompute_distances': 'auto',\n",
       " 'random_state': None,\n",
       " 'tol': 0.0001,\n",
       " 'verbose': 0}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_clusters = 3\n",
    "\n",
    "kmeans = KMeans(n_clusters=n_clusters)\n",
    "kmeans.fit(data_trnsf)\n",
    "cluster_labels = kmeans.fit_predict(data_trnsf)\n",
    "kmeans.get_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[6.75324232e-01 7.28759954e-01 3.82161547e+00 2.02696701e+02\n",
      "  3.41501706e+00 1.71786121e-01 6.05233220e-02 2.38907850e-02\n",
      "  9.67007964e-02 4.67121729e-01 4.36177474e-01 7.73606371e-02\n",
      "  5.09670080e-02 4.73265074e-02 4.73265074e-02 5.05119454e-02\n",
      "  5.62002275e-02 6.07508532e-02 2.73037543e-01 1.51535836e-01\n",
      "  1.84982935e-01]\n",
      " [5.92360893e-01 7.78814655e-01 4.37284483e+00 2.58326607e+02\n",
      "  3.76763323e+00 1.32053292e-01 3.31700627e-01 1.99843260e-02\n",
      "  7.56269592e-02 4.91379310e-01 4.32993730e-01 8.79702194e-02\n",
      "  5.44670846e-02 5.05485893e-02 4.76097179e-02 3.85971787e-02\n",
      "  5.56426332e-02 5.83855799e-02 2.74882445e-01 1.45768025e-01\n",
      "  1.86128527e-01]\n",
      " [5.81896364e-01 6.47789091e-01 3.25945455e+00 1.46582364e+02\n",
      "  3.31472727e+00 1.34545455e-01 2.93090909e-01 2.03636364e-02\n",
      "  7.74545455e-02 5.00909091e-01 4.21636364e-01 7.96363636e-02\n",
      "  5.18181818e-02 5.47272727e-02 5.23636364e-02 3.83636364e-02\n",
      "  5.94545455e-02 6.12727273e-02 2.79454545e-01 1.48909091e-01\n",
      "  1.74000000e-01]]\n",
      "[2 1 1 ... 2 1 2]\n",
      "For n_clusters = 3 The average silhouette_score is : 0.581903917044546\n"
     ]
    }
   ],
   "source": [
    "centroid = kmeans.cluster_centers_\n",
    "labels = kmeans.labels_\n",
    "\n",
    "print (centroid)\n",
    "print(labels)\n",
    "\n",
    "silhouette_avg = silhouette_score(data_trnsf, cluster_labels)\n",
    "print(\"For n_clusters =\", n_clusters,\"The average silhouette_score is :\", silhouette_avg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
